home
***
CD-ROM
|
disk
|
FTP
|
other
***
search
/
FishMarket 1.0
/
FishMarket v1.0.iso
/
fishies
/
101-125
/
disk_102
/
match_stuff
/
match.c
< prev
next >
Wrap
C/C++ Source or Header
|
1992-05-06
|
13KB
|
442 lines
/* TRIPOS Pattern Matching algorithm with extensions */
/*
* Original BCPL version by Martin Richards
* This is a transliteration of the BCPL code with additions
* by Pete Goodeve 87:8:10
*
* This version has:
* simple repetition ("*") (alternative to "#?")
* negation ("~" and "||")
* slicing ("^")
*/
/*
* Reference:
*
* Martin Richards,
* "A Compact Function for Regular Expression
* Pattern Matching"
* [Software--Practice and Experience, Vol 9, 527-534 (1979)]
*/
/* don't #define DEBUG 1 */
/* ...there are large hunks of optional trace code in this version */
#include <exec/types.h>
/* Code bits returned by Pattern Compiler: */
/* this bit always set unless pattern is bad: */
#define CODE_OK 1
/* the following bits will not be set if pattern is bad: */
/* set if any single-wild-match characters present ("?"): */
#define CODE_ANY 2
/* set if any multiple=-wild-match characters present ("*"): */
#define CODE_ALL 4
/* set if any grouping characters present ("()"): */
#define CODE_GROUP 8
/* set if pattern has alternations ("|"): */
#define CODE_ALT 16
/* set if pattern has repeating segments ("#" or "*"): */
#define CODE_REP 32
/* set if pattern has negation ("~" or "||"): */
#define CODE_NEG 64
/* set if pattern is sliced ("^"): */
#define CODE_SLICE 128
/* set if more than MAXMARK slices or if used within parentheses */
#define CODE_OVERSLICE CODE_SLICE | 256
/* Pattern Control Characters are defined here so you can change them
* easily.
* You may undefine PAT_ALL or PAT_SLICE to remove the sections of code
* that provide these functions; the other definitions may be changed but
* not removed.
*/
#define PAT_QUOTE '\''
#define PAT_LGROUP '('
#define PAT_RGROUP ')'
#define PAT_ALT '|'
#define PAT_REP '#'
#define PAT_ANY '?'
#define PAT_ALL '*'
#define PAT_NEG '~'
/* -- note double PAT_ALT is also always a negation code */
#define PAT_NULL '%'
#define PAT_SLICE '^'
#define Endstreamch 0xFF
#define WORKSIZE 128
/*** Static Variables: ***/
UBYTE *Work = NULL;
int Wp = 0, Succflag = FALSE;
char *Pat = 0;
UBYTE *Aux = 0;
UBYTE Ch = 0;
int PatP = 0, Patlen = 0,
Errflag = FALSE,
NegP = 0;
UBYTE *NWork = NULL;
#ifdef PAT_SLICE -------------------------v
#define MAXMARK 4
#define MAXCUT 16
struct MarkSet {UBYTE mk[MAXMARK];};
UBYTE *Svect = NULL;
int cutlimit;
struct MarkSet MarkP, *MWork;
#endif -----------------------------------^
int S, StrLength;
#ifdef DEBUG ==============================V
int DEBmode = TRUE; /* set this false for no printout when DEBUG defined */
int ii; /* local variable -- defined here for convenience */
#endif ====================================^
/*%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%*/
/*** The Interpreter: ***/
#ifdef PAT_SLICE --------------------------v
SMatch(Pat, Aux, Str, Slice) UBYTE *Aux; char *Pat, *Str, *Slice;
#else ------------------------------------
Match(Pat, Aux, Str) UBYTE *Aux; char *Pat, *Str;
#endif ------------------------------------^
{
UBYTE W[WORKSIZE];
UBYTE NegW[WORKSIZE]; /* for negation */
#ifdef PAT_SLICE --------------------------v
struct MarkSet MarkW[WORKSIZE]; /* for slice marking */
#endif ------------------------------------^
register int N, I;
register UBYTE P, Q;
char K;
StrLength = strlen(Str);
S = 0;
Pat -= 1; /* align into "BCPL" form (byte 0 never accessed) */
Work = W;
*Work = 0; /* remote possibility of first Put failing otherwise */
NWork = NegW;
#ifdef PAT_SLICE ---------------------------v
MWork = MarkW;
for (I=0; I < MAXMARK; I++) {
MWork->mk[I] = MarkP.mk[I] = 0;
}
if (Svect = Slice) /* allowed to be NULL */
*Svect = 0;
cutlimit = MAXCUT;
#endif -------------------------------------^
Wp = 0;
Succflag = FALSE;
NegP = 0;
Put(1);
if (!(Aux[0]==0)) Put(Aux[0]);
do {
for (N=1; N <= Wp; N++) { /* first complete the closure: */
P = Work[N];
NegP = NWork[N] & 1; /* propagate low bit only */
#ifdef PAT_SLICE ---------------------------v
MarkP = MWork[N];
#endif -------------------------------------^
K = Pat[P]; Q = Aux[P];
switch(K)
{
case PAT_REP: Put(P+1);
#ifdef PAT_ALL -----------------------------v
case PAT_ALL:
#endif -------------------------------------^
case PAT_NULL: Put(Q);
default: break;
case PAT_LGROUP:
case PAT_ALT:
if (Q != 0) Put(Q);
if (Pat[P+1] == '|') { /* Negative alternate */
NegP = 1;
P++;
}
Put(P+1);
break;
case PAT_NEG:
Put(Q);
NegP = 1;
if (!(NWork[N] & 2)) Put(P+1);
break;
#ifdef PAT_SLICE ----------------------------v
case PAT_SLICE:
Putslice(P, S);
Put(Q);
#endif --------------------------------------^
}
}
#ifdef DEBUG ====================================V
if (DEBmode) {
printf("\nSucc=%d Closure Vector (len=%d):\n", Succflag, Wp);
for (ii=1; ii<=Wp; ii++) printf("%3d", Work[ii]);
putchar('\n');
for (ii=1; ii<=Wp; ii++) printf("%3d", NWork[ii]);
puts("\n=====");
}
#endif ==========================================^
if (S >= StrLength)
return (Succflag > 0);
if (Wp == 0) return FALSE;
Ch = Str[S++];
#ifdef DEBUG ====================================V
if (DEBmode)
printf("Matching character %c:\n",Ch);
#endif ==========================================^
/* now deal with match items: */
N = Wp;
Wp = 0;
Succflag = FALSE;
for (I = 1; I <= N; I++) {
P = Work[I];
NegP = NWork[I];
#ifdef PAT_SLICE ------------------------------v
MarkP = MWork[I];
#endif ----------------------------------------^
K = Pat[P];
switch(K) {
case PAT_NEG: NegP |= 2; Put(P);
#ifdef PAT_SLICE ------------------------------v
case PAT_SLICE:
#endif ----------------------------------------^
case PAT_REP:
case PAT_ALT:
case PAT_NULL:
case PAT_LGROUP:
break; /* BCPL was 'loop' */
case PAT_QUOTE: K = Pat[P+1];
default: if (Ch != K) break; /* 'loop' */
case PAT_ANY: /* successful match */
Put(Aux[P]);
break; /* 'loop' */
#ifdef PAT_ALL --------------------------------v
case PAT_ALL: Put(P); /* point back to self...*/
break; /* 'loop' */
#endif ----------------------------------------^
}
}
#ifdef DEBUG ======================================V
if (DEBmode) {
printf("\nSucc=%d Match Vector (len=%d):\n", Succflag, Wp);
for (ii=1; ii<=Wp; ii++) printf("%3d", Work[ii]);
putchar('\n');
for (ii=1; ii<=Wp; ii++) printf("%3d", NWork[ii]);
puts("\n=====");
}
#endif ============================================^
} while (TRUE);
} /*** end Match ***/
Put(N) int N;
{
UBYTE *W, *Wlim, *NW;
#ifdef DEBUG ======================================V
if (DEBmode)
#ifdef PAT_SLICE ------------------------------v
printf("Put %d[%d, %d/%d] ", N, NegP, MarkP.mk[0], MarkP.mk[1]);
#else ----------------------------------------
printf("Put %d[%d] ", N, NegP);
#endif ----------------------------------------^
#endif ============================================^
if (N == 0) {
Succflag |= NegP ? -1 : 1;
#ifdef PAT_SLICE ------------------------------v
if (S == StrLength) RetSlice(); /* matched -- pass back slice info */
#endif ----------------------------------------^
}
else {
W = Work;
Wlim = W + Wp;
NW = NWork;
for(;W <= Wlim; W++, NW++)
if ((*W == N)&&(*NW == NegP))
return;
*W = N;
*NW = NegP;
Wp = W - Work;
#ifdef PAT_SLICE ------------------------------v
MWork[Wp] = MarkP;
#endif ----------------------------------------^
if (Wp >= WORKSIZE) exit(33);
}
}
#ifdef PAT_SLICE -----------------------------------------------v
Putslice(P, S) int P, S;
{
int i;
UBYTE *MW;
#ifdef DEBUG ======================================V
if (DEBmode)
printf("Slice ^%d=%d ", P, S);
#endif ============================================^
for (i = 0, MW = (UBYTE *)MWork;
i<MAXMARK && *MW && (P != *MW); i++, MW++)
; /* loop */
if (i==MAXMARK) return;
*MW = P;
MarkP.mk[i] = S;
}
RetSlice() {
int i;
UBYTE *mwp = MWork->mk,
*mpp = MarkP.mk,
lastcut = 0;
if (!Svect) return; /* ignore if NULL */
#ifdef DEBUG ======================================V
if (DEBmode)
printf("RetSlice.. Svect = %d\n",Svect);
#endif ============================================^
for (i=MAXMARK; i && *mwp && cutlimit; i--, cutlimit--)
if(*mpp >= lastcut) { /* avoid supefluous cuts */
*Svect++ = *mwp++;
lastcut = *Svect++ = *mpp++;
}
*Svect = 0;
}
#endif ---------------------------------------------------------^
/*********************************************************************/
/*** The Compiler: ***/
int retcode, grouplevel, slicecount;
Rch() {
if (PatP >= Patlen)
Ch = Endstreamch;
else {
Ch = Pat[++PatP];
}
}
Nextitem() {
if (Ch == PAT_QUOTE) Rch();
Rch();
}
int Prim(NegMark) int NegMark;
{
int A = PatP;
UBYTE Op = Ch;
if (NegMark) retcode |= CODE_NEG;
Nextitem();
switch(Op) {
case PAT_ANY:
retcode |= CODE_ANY;
return A;
case PAT_ALL:
retcode |= CODE_ALL;
return A;
case Endstreamch:
case PAT_RGROUP:
case PAT_ALT:
Errflag = TRUE;
default: return A;
case PAT_REP:
Setexits(Prim(NegMark), A);
retcode |= CODE_REP;
return A;
case PAT_LGROUP:
grouplevel++;
A = Exp(A, (NegMark ? 1 : FALSE));
grouplevel--;
if (Ch != PAT_RGROUP) Errflag = TRUE;
retcode |= CODE_GROUP;
Nextitem();
return A;
case PAT_NEG:
if (NegMark) Errflag = TRUE;
NegMark = 1;
retcode |= CODE_NEG;
Join(A, Prim(NegMark));
NegMark = FALSE;
return A;
#ifdef PAT_SLICE ------------------------------v
case PAT_SLICE:
retcode |= (++slicecount > MAXMARK || grouplevel ) ?
CODE_OVERSLICE : CODE_SLICE;
return A;
#endif ----------------------------------------^
}
}
int Exp(AltP, NegMark) int AltP, NegMark;
{
int Exits = 0, A;
do {
A = Prim(NegMark);
if (Ch == PAT_ALT || Ch == PAT_RGROUP || Ch == Endstreamch)
{ Exits = Join(Exits, A);
if (Ch != PAT_ALT)
return Exits;
retcode |= CODE_ALT;
Aux[AltP] = PatP;
AltP = PatP;
Nextitem();
if (Ch == PAT_ALT) { /* negation convention */
if (NegMark == 1) Errflag = TRUE;
NegMark = -1; /* not an error to meet oneself */
retcode |= CODE_NEG;
Nextitem();
}
else NegMark = FALSE;
}
else Setexits(A, PatP);
} while (TRUE);
}
Setexits(List, Val) int List, Val;
{
int A;
while (List != 0) {
A = Aux[List];
Aux[List] = Val;
List = A;
}
}
int Join(A, B) int A, B;
{
int T = A;
if (A == 0) return B;
while (Aux[A] != 0) A = Aux[A];
Aux[A] = B;
return T;
}
int CmplPat(Pattern, CmplPattern) char *Pattern, *CmplPattern;
{
int I;
Pat = Pattern - 1;
Aux = CmplPattern;
Patlen = strlen(Pattern); /** this is no longer a BSTR!!! **/
PatP = 0;
Errflag = FALSE;
retcode = CODE_OK;
grouplevel = slicecount = 0;
for (I = 0; I <= Patlen; I++) Aux[I] = 0;
Rch();
Setexits(Exp(0,FALSE), 0);
return Errflag ? 0 : retcode;
}